from utils import paddle_aux
import os
import paddle
"""
Train a YOLOv5 model on a custom dataset.

Models and datasets download automatically from the latest YOLOv5 release.
Models: https://github.com/ultralytics/yolov5/tree/master/models
Datasets: https://github.com/ultralytics/yolov5/tree/master/data
Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data

Usage:
    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
"""
import argparse
import math
import random
import sys
import time
from copy import deepcopy
from datetime import datetime
from pathlib import Path
import numpy as np
import yaml
from tqdm import tqdm
from utils.cawb import CosineAnnealingWarmbootingLR
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
import val
from models.experimental import attempt_load
from models.yolo import Model
from utils.autoanchor import check_anchors
from utils.autobatch import check_train_batch_size
from utils.callbacks import Callbacks
from utils.datasets import create_dataloader
from utils.downloads import attempt_download
from utils.general import LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer
from utils.loggers import Loggers
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.loss import ComputeLoss
from utils.metrics import fitness
from utils.plots import plot_evolve, plot_labels
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
RANK = int(os.getenv('RANK', -1))
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))


def train(hyp, opt, device, callbacks):
    (save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg,
        resume, noval, nosave, workers, freeze) = (Path(opt.save_dir), opt.
        epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve,
        opt.data, opt.cfg, opt.resume, opt.noval, opt.nosave, opt.workers,
        opt.freeze)
    w = save_dir / 'weights'
    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)
    last, best = w / 'last.pt', w / 'best.pt'
    if isinstance(hyp, str):
        with open(hyp, errors='ignore') as f:
            hyp = yaml.safe_load(f)
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k,
        v in hyp.items()))
    if not evolve:
        with open(save_dir / 'hyp.yaml', 'w') as f:
            yaml.safe_dump(hyp, f, sort_keys=False)
        with open(save_dir / 'opt.yaml', 'w') as f:
            yaml.safe_dump(vars(opt), f, sort_keys=False)
    data_dict = None
    if RANK in [-1, 0]:
        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)
        if loggers.wandb:
            data_dict = loggers.wandb.data_dict
            if resume:
                weights, epochs, hyp, batch_size = (opt.weights, opt.epochs,
                    opt.hyp, opt.batch_size)
        for k in methods(loggers):
            callbacks.register_action(k, callback=getattr(loggers, k))
    plots = not evolve
    cuda = device.type != 'cpu'
    init_seeds(1 + RANK)
    with torch_distributed_zero_first(LOCAL_RANK):
        data_dict = data_dict or check_dataset(data)
    train_path, val_path = data_dict['train'], data_dict['val']
    nc = 1 if single_cls else int(data_dict['nc'])
    names = ['item'] if single_cls and len(data_dict['names']
        ) != 1 else data_dict['names']
    assert len(names
        ) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'
    is_coco = isinstance(val_path, str) and val_path.endswith(
        'coco/val2017.txt')
    check_suffix(weights, '.pt')
    pretrained = weights.endswith('.pt')
    if pretrained:
        with torch_distributed_zero_first(LOCAL_RANK):
            weights = attempt_download(weights)
        ckpt = paddle.load(path=str(weights))
        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.
            get('anchors')).to(device)
        exclude = ['anchor'] if (cfg or hyp.get('anchors')
            ) and not resume else []
        csd = ckpt['model'].astype(dtype='float32').state_dict()
        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)
        model.set_state_dict(state_dict=csd)
        LOGGER.info(
            f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}'
            )
    else:
        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(
        freeze[0]))]
    for k, v in model.named_parameters():
        v.stop_gradient = not True
        if any(x in k for x in freeze):
            LOGGER.info(f'freezing {k}')
            v.stop_gradient = not False
    gs = max(int(model.stride.max()), 32)
    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)
    if RANK == -1 and batch_size == -1:
        batch_size = check_train_batch_size(model, imgsz)
        loggers.on_params_update({'batch_size': batch_size})
    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size //
        WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if 
        opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK,
        workers=workers, image_weights=opt.image_weights, quad=opt.quad,
        prefix=colorstr('train: '), shuffle=True)
    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())
    nb = len(train_loader)
    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
    nbs = 64
    accumulate = max(round(nbs / batch_size), 1)
    hyp['weight_decay'] *= batch_size * accumulate / nbs
    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
    g0, g1, g2 = [], [], []
    for v in model.sublayers():
        if hasattr(v, 'bias') and isinstance(v.bias, paddle.base.framework.
            EagerParamBase.from_tensor):
            g2.append(v.bias)
        if isinstance(v, paddle.nn.BatchNorm2D):
            g0.append(v.weight)
        elif hasattr(v, 'weight') and isinstance(v.weight, paddle.base.
            framework.EagerParamBase.from_tensor):
            g1.append(v.weight)
    if opt.optimizer == 'Adam':
        optimizer = paddle.optimizer.Adam(parameters=g0, learning_rate=hyp[
            'lr0'], beta1=(hyp['momentum'], 0.999)[0], beta2=(hyp[
            'momentum'], 0.999)[1], weight_decay=0.0)
    elif opt.optimizer == 'AdamW':
        optimizer = paddle.optimizer.AdamW(parameters=g0, learning_rate=hyp
            ['lr0'], beta1=(hyp['momentum'], 0.999)[0], beta2=(hyp[
            'momentum'], 0.999)[1], weight_decay=0.0)
    else:
        optimizer = paddle.optimizer.SGD(g0, lr=hyp['lr0'], momentum=hyp[
            'momentum'], nesterov=True)
    """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
    optimizer.add_param_group({'params': g1, 'weight_decay': hyp[
        'weight_decay']})
    """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
    optimizer.add_param_group({'params': g2})
    LOGGER.info(
        f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups {len(g0)} weight (no decay), {len(g1)} weight, {len(g2)} bias"
        )
    if opt.cos_lr:
        lf = lambda x, y=opt.epochs: ((1 + math.cos(x * math.pi / y)) / 2
            ) ** 1.0 * (1.0 - hyp['lrf']) + hyp['lrf']
        scheduler = CosineAnnealingWarmbootingLR(optimizer, epochs=opt.
            epochs, steps=opt.cawb_steps, step_scale=0.7, lf=lf, batchs=len
            (train_loader), warmup_epoch=hyp['warmup_epochs'], epoch_scale=4.0)
    else:
        lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']
        tmp_lr = paddle.optimizer.lr.LambdaDecay(lr_lambda=lf,
            learning_rate=optimizer.get_lr())
        optimizer.set_lr_scheduler(tmp_lr)
        scheduler = tmp_lr
    ema = ModelEMA(model) if RANK in [-1, 0] else None
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        if ckpt['optimizer'] is not None:
            optimizer.set_state_dict(state_dict=ckpt['optimizer'])
            best_fitness = ckpt['best_fitness']
        if ema and ckpt.get('ema'):
            ema.ema.load_state_dict(ckpt['ema'].astype(dtype='float32').
                state_dict())
            ema.updates = ckpt['updates']
        start_epoch = ckpt['epoch'] + 1
        if resume:
            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
        if epochs < start_epoch:
            LOGGER.info(
                f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs."
                )
            epochs += ckpt['epoch']
        del ckpt, csd
    if cuda and RANK == -1 and paddle.device.cuda.device_count() > 1:
        LOGGER.warning(
            """WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.
See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started."""
            )
        model = paddle.DataParallel(layers=model)
    if opt.sync_bn and cuda and RANK != -1:
        model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(layer=model).to(
            device)
        LOGGER.info('Using SyncBatchNorm()')
    if RANK in [-1, 0]:
        val_loader = create_dataloader(val_path, imgsz, batch_size //
            WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else
            opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5,
            prefix=colorstr('val: '))[0]
        if not resume:
            labels = np.concatenate(dataset.labels, 0)
            if plots:
                plot_labels(labels, names, save_dir)
            if not opt.noautoanchor:
                check_anchors(dataset, model=model, thr=hyp['anchor_t'],
                    imgsz=imgsz)
            model.astype(dtype='float16').astype(dtype='float32')
        callbacks.run('on_pretrain_routine_end')
    if cuda and RANK != -1:
       model = paddle.DataParallel(model, device_ids
            =[LOCAL_RANK], output_device=LOCAL_RANK)
    nl = de_parallel(model).model[-1].nl
    hyp['box'] *= 3 / nl
    hyp['cls'] *= nc / 80 * 3 / nl
    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl
    hyp['label_smoothing'] = opt.label_smoothing
    model.nc = nc
    model.hyp = hyp
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device
        ) * nc
    """Class Attribute: torch.Tensor.names, can not convert, please check whether it is torch.Tensor.*/torch.autograd.function.FunctionCtx.*/torch.distributions.Distribution.* and convert manually"""
    model.names = names
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb), 100)
    last_opt_step = -1
    maps = np.zeros(nc)
    results = 0, 0, 0, 0, 0, 0, 0
    scheduler.last_epoch = start_epoch - 1
    scaler = paddle.amp.GradScaler(enable=cuda, incr_every_n_steps=2000,
        init_loss_scaling=65536.0)
    stopper = EarlyStopping(patience=opt.patience)
    compute_loss = ComputeLoss(model)
    LOGGER.info(
        f"""Image sizes {imgsz} train, {imgsz} val
Using {train_loader.num_workers * WORLD_SIZE} dataloader workers
Logging results to {colorstr('bold', save_dir)}
Starting training for {epochs} epochs..."""
        )
    for epoch in range(start_epoch, epochs):
        if epoch == opt.epochs / 2 and not resume:
            optimizer = paddle.optimizer.SGD(g0, lr=0.01, momentum=0.937,
                nesterov=True)
            """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
            optimizer.add_param_group({'params': g1, 'weight_decay': hyp[
                'weight_decay']})
            """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
            optimizer.add_param_group({'params': g2})
            LOGGER.info(
                f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups {len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias"
                )
            del g0, g1, g2
            if opt.cos_lr:
                lf = lambda x, y=opt.epochs: ((1 + math.cos(x * math.pi / y
                    )) / 2) ** 1.0 * (1.0 - hyp['lrf']) + hyp['lrf']
                scheduler = CosineAnnealingWarmbootingLR(optimizer, epochs=
                    opt.epochs, steps=opt.cawb_steps, step_scale=0.7, lf=lf,
                    batchs=len(train_loader), warmup_epoch=hyp[
                    'warmup_epochs'], epoch_scale=4.0)
            else:
                lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp[
                    'lrf']
                tmp_lr = paddle.optimizer.lr.LambdaDecay(lr_lambda=lf,
                    learning_rate=optimizer.get_lr())
                optimizer.set_lr_scheduler(tmp_lr)
                scheduler = tmp_lr
        model.train()
        if opt.image_weights:
            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc
            iw = labels_to_image_weights(dataset.labels, nc=nc,
                class_weights=cw)
            dataset.indices = random.choices(range(dataset.n), weights=iw,
                k=dataset.n)
        mloss = paddle.zeros(shape=[3])
        if RANK != -1:
            train_loader.sampler.set_epoch(epoch)
        pbar = enumerate(train_loader)
        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj',
            'cls', 'labels', 'img_size'))
        if RANK in [-1, 0]:
            pbar = tqdm(pbar, total=nb, bar_format=
                '{l_bar}{bar:10}{r_bar}{bar:-10b}')
        optimizer.clear_gradients(set_to_zero=False)
        for i, (imgs, targets, paths, _) in pbar:
            ni = i + nb * epoch
            imgs = imgs.to(device, blocking=not True).astype(dtype='float32'
                ) / 255
            if ni <= nw:
                xi = [0, nw]
                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]
                    ).round())
                for j, x in enumerate(optimizer.param_groups):
                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j ==
                        2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, xi, [hyp[
                            'warmup_momentum'], hyp['momentum']])
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs
                sf = sz / max(tuple(imgs.shape)[2:])
                if sf != 1:
                    ns = [(math.ceil(x * sf / gs) * gs) for x in tuple(imgs
                        .shape)[2:]]
                    imgs = paddle.nn.functional.interpolate(x=imgs, size=ns,
                        mode='bilinear', align_corners=False)
            with paddle.amp.auto_cast(enable=cuda):
                pred = model(imgs)
                loss, loss_items = compute_loss(pred, targets.to(device))
                if RANK != -1:
                    loss *= WORLD_SIZE
                if opt.quad:
                    loss *= 4.0
            scaler.scale(loss).backward()
            if ni - last_opt_step >= accumulate:
                scaler.step(optimizer)
                scaler.update()
                optimizer.clear_gradients(set_to_zero=False)
                if ema:
                    ema.update(model)
                last_opt_step = ni
            if RANK in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1)
                mem = (
                    f'{paddle.device.cuda.memory_reserved() / 1000000000.0 if paddle.device.cuda.device_count() >= 1 else 0:.3g}G'
                    )
                pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
                    f'{epoch + 1}/{epochs}', mem, *mloss, tuple(targets.
                    shape)[0], tuple(imgs.shape)[-1]))
                callbacks.run('on_train_batch_end', ni, model, imgs,
                    targets, paths, plots, opt.sync_bn)
                if callbacks.stop_training:
                    return
        lr = [x['lr'] for x in optimizer.param_groups]
        scheduler.step()
        if RANK in [-1, 0]:
            callbacks.run('on_train_epoch_end', epoch=epoch)
            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names',
                'stride', 'class_weights'])
            final_epoch = epoch + 1 == epochs or stopper.possible_stop
            if not noval or final_epoch:
                results, maps, _ = val.run(data_dict, batch_size=batch_size //
                    WORLD_SIZE * 2, imgsz=imgsz, model=ema.ema, single_cls=
                    single_cls, dataloader=val_loader, save_dir=save_dir,
                    plots=False, callbacks=callbacks, compute_loss=compute_loss
                    )
            fi = fitness(np.array(results).reshape(1, -1))
            if fi > best_fitness:
                best_fitness = fi
            log_vals = list(mloss) + list(results) + lr
            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi
                )
            if not nosave or final_epoch and not evolve:
                ckpt = {'epoch': epoch, 'best_fitness': best_fitness,
                    'model': deepcopy(de_parallel(model)).half(), 'ema':
                    deepcopy(ema.ema).half(), 'updates': ema.updates,
                    'optimizer': optimizer.state_dict(), 'wandb_id': 
                    loggers.wandb.wandb_run.id if loggers.wandb else None,
                    'date': datetime.now().isoformat()}
                paddle.save(obj=ckpt, path=last)
                if best_fitness == fi:
                    paddle.save(obj=ckpt, path=best)
                if (epoch > 0 and opt.save_period > 0 and epoch % opt.
                    save_period == 0):
                    paddle.save(obj=ckpt, path=w / f'epoch{epoch}.pt')
                del ckpt
                callbacks.run('on_model_save', last, epoch, final_epoch,
                    best_fitness, fi)
            if RANK == -1 and stopper(epoch=epoch, fitness=fi):
                break
    if RANK in [-1, 0]:
        LOGGER.info(
            f"""
{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours."""
            )
        for f in (last, best):
            if f.exists():
                strip_optimizer(f)
                if f is best:
                    LOGGER.info(f'\nValidating {f}...')
                    results, _, _ = val.run(data_dict, batch_size=
                        batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=
                        attempt_load(f, device).half(), iou_thres=0.65 if
                        is_coco else 0.6, single_cls=single_cls, dataloader
                        =val_loader, save_dir=save_dir, save_json=is_coco,
                        verbose=True, plots=True, callbacks=callbacks,
                        compute_loss=compute_loss)
                    if is_coco:
                        callbacks.run('on_fit_epoch_end', list(mloss) +
                            list(results) + lr, epoch, best_fitness, fi)
        callbacks.run('on_train_end', last, best, plots, epoch, results)
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
    paddle.device.cuda.empty_cache()
    return results


def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / '', help=
        'initial weights path')
    parser.add_argument('--cfg', type=str, default=ROOT /
        'models/yolov5s-mobilenetV2improved.yaml', help='model.yaml path')
    parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml',
        help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT /
        'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--batch-size', type=int, default=10, help=
        'total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default
        =1024, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help=
        'rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False,
        help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help=
        'only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help=
        'only validate final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help=
        'disable AutoAnchor')
    parser.add_argument('--evolve', type=int, default=False, nargs='?',
        const=300, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='disk', help=
        '--cache images in "ram" (default) or "disk"')
    parser.add_argument('--image-weights', action='store_true', help=
        'use weighted image selection for training')
    parser.add_argument('--device', default='0', help=
        'cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help=
        'vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help=
        'train multi-class data as single-class')
    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam',
        'AdamW'], default='AdamW', help='optimizer')
    parser.add_argument('--sync-bn', action='store_true', help=
        'use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--workers', type=int, default=8, help=
        'max dataloader workers (per RANK in DDP mode)')
    parser.add_argument('--project', default=ROOT / 'runs/train', help=
        'save to project/name')
    parser.add_argument('--name', default=
        'yolov5s-mobilenetV2improved-jingshan20220816', help=
        'save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help=
        'existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--cos-lr', default=True, action='store_true', help
        ='cosine LR scheduler')
    parser.add_argument('--label-smoothing', type=float, default=0.3, help=
        'Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=50, help=
        'EarlyStopping patience (epochs without improvement)')
    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help=
        'Freeze layers: backbone=10, first3=0 1 2')
    parser.add_argument('--save-period', type=int, default=-1, help=
        'Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help=
        'DDP parameter, do not modify')
    parser.add_argument('--cawb_steps', nargs='+', type=int, default=[25, 
        75, 125, 175, 225, 275, 325, 375, 425, 475], help=
        'the cawb learning rate scheduler steps')
    parser.add_argument('--entity', default=None, help='W&B: Entity')
    parser.add_argument('--upload_dataset', nargs='?', const=True, default=
        False, help='W&B: Upload data, "val" option')
    parser.add_argument('--bbox_interval', type=int, default=-1, help=
        'W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest',
        help='W&B: Version of dataset artifact to use')
    opt = parser.parse_known_args()[0] if known else parser.parse_args()
    return opt


def main(opt, callbacks=Callbacks()):
    if RANK in [-1, 0]:
        print_args(FILE.stem, opt)
        check_git_status()
        check_requirements(exclude=['thop'])
    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:
        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()
        assert os.path.isfile(ckpt
            ), 'ERROR: --resume checkpoint does not exist'
        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
            opt = argparse.Namespace(**yaml.safe_load(f))
        opt.cfg, opt.weights, opt.resume = '', ckpt, True
        LOGGER.info(f'Resuming training from {ckpt}')
    else:
        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = check_file(opt
            .data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights
            ), str(opt.project)
        assert len(opt.cfg) or len(opt.weights
            ), 'either --cfg or --weights must be specified'
        if opt.evolve:
            if opt.project == str(ROOT / 'runs/train'):
                opt.project = str(ROOT / 'runs/evolve')
            opt.exist_ok, opt.resume = opt.resume, False
        opt.save_dir = str(increment_path(Path(opt.project) / opt.name,
            exist_ok=opt.exist_ok))
    device = select_device(opt.place, batch_size=opt.batch_size)
    if LOCAL_RANK != -1:
        msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
        assert not opt.image_weights, f'--image-weights {msg}'
        assert not opt.evolve, f'--evolve {msg}'
        assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
        assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
        assert paddle.device.cuda.device_count(
            ) > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
        paddle.device.set_device(device='gpu:' + str(LOCAL_RANK) if
            isinstance(LOCAL_RANK, int) else str(LOCAL_RANK).replace('cuda',
            'gpu'))
        device = paddle.CUDAPlace(LOCAL_RANK)
        paddle.distributed.init_parallel_env()
    if not opt.evolve:
        train(opt.hyp, opt, device, callbacks)
        if WORLD_SIZE > 1 and RANK == 0:
            LOGGER.info('Destroying process group... ')
            paddle.distributed.destroy_process_group()
    else:
        meta = {'lr0': (1, 1e-05, 0.1), 'lrf': (1, 0.01, 1.0), 'momentum':
            (0.3, 0.6, 0.98), 'weight_decay': (1, 0.0, 0.001),
            'warmup_epochs': (1, 0.0, 5.0), 'warmup_momentum': (1, 0.0, 
            0.95), 'warmup_bias_lr': (1, 0.0, 0.2), 'box': (1, 0.02, 0.2),
            'cls': (1, 0.2, 4.0), 'cls_pw': (1, 0.5, 2.0), 'obj': (1, 0.2, 
            4.0), 'obj_pw': (1, 0.5, 2.0), 'iou_t': (0, 0.1, 0.7),
            'anchor_t': (1, 2.0, 8.0), 'anchors': (2, 2.0, 10.0),
            'fl_gamma': (0, 0.0, 2.0), 'hsv_h': (1, 0.0, 0.1), 'hsv_s': (1,
            0.0, 0.9), 'hsv_v': (1, 0.0, 0.9), 'degrees': (1, 0.0, 45.0),
            'translate': (1, 0.0, 0.9), 'scale': (1, 0.0, 0.9), 'shear': (1,
            0.0, 10.0), 'perspective': (0, 0.0, 0.001), 'flipud': (1, 0.0, 
            1.0), 'fliplr': (0, 0.0, 1.0), 'mosaic': (1, 0.0, 1.0), 'mixup':
            (1, 0.0, 1.0), 'copy_paste': (1, 0.0, 1.0)}
        with open(opt.hyp, errors='ignore') as f:
            hyp = yaml.safe_load(f)
            if 'anchors' not in hyp:
                hyp['anchors'] = 3
        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)
        evolve_yaml, evolve_csv = (save_dir / 'hyp_evolve.yaml', save_dir /
            'evolve.csv')
        if opt.bucket:
            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}')
        for _ in range(opt.evolve):
            if evolve_csv.exists():
                parent = 'single'
                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
                n = min(5, len(x))
                x = x[np.argsort(-fitness(x))][:n]
                w = fitness(x) - fitness(x).min() + 1e-06
                if parent == 'single' or len(x) == 1:
                    x = x[random.choices(range(n), weights=w)[0]]
                elif parent == 'weighted':
                    x = (x * w.reshape(n, 1)).sum(axis=0) / w.sum()
                mp, s = 0.8, 0.2
                npr = np.random
                npr.seed(int(time.time()))
                g = np.array([meta[k][0] for k in hyp.keys()])
                ng = len(meta)
                v = np.ones(ng)
                while all(v == 1):
                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.
                        random() * s + 1).clip(0.3, 3.0)
                for i, k in enumerate(hyp.keys()):
                    hyp[k] = float(x[i + 7] * v[i])
            for k, v in meta.items():
                hyp[k] = max(hyp[k], v[1])
                hyp[k] = min(hyp[k], v[2])
                hyp[k] = round(hyp[k], 5)
            results = train(hyp.copy(), opt, device, callbacks)
            callbacks = Callbacks()
            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
        plot_evolve(evolve_csv)
        LOGGER.info(
            f"""Hyperparameter evolution finished {opt.evolve} generations
Results saved to {colorstr('bold', save_dir)}
Usage example: $ python train.py --hyp {evolve_yaml}"""
            )


def run(**kwargs):
    opt = parse_opt(True)
    for k, v in kwargs.items():
        setattr(opt, k, v)
    main(opt)
    return opt


if __name__ == '__main__':
    opt = parse_opt()
    main(opt)
